 # Simulation Washout
## Show that data from aggregated preferences wash out 

## Define an individual digit preference as a benford distribution where some digits
## are replaced with a preferred digit

digitpreference <- function(n){
    # For sample size n, start with benford numbers
    mantissa  = exp(runif(n)*log(10))
    power = sample(seq(3,7),n, replace = T)
    benford = floor(mantissa *(10^power))

    # Pick 2 digits to prefer, for the whole dataset for this creator
    x <- sample(0:9, 2)

    # With 20% probability for each obs, replace a random digit in the number
    # for a preferred digit
    for(i in 1:n){
      if(runif(1) < 0.2){
        # Turn number into string
        num = toString(benford[i])
        # Find position to replace
        pos = sample(1:nchar(num), 1)
        
        replace = sample(x, 1)
        
        substr(num, pos, pos) = toString(replace)
        benford[i] = num
      } 
    }
    return(benford)
}

## Each district generates its own data with digit preferences
N <- 1000
districts <- c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")
D = data.frame(matrix(ncol = 0, nrow = 0))
for(d in districts){
  tmp <- data.frame(matrix(ncol = 0, nrow = N))
  tmp$Expense <- digitpreference(N) 
  tmp$District <- d 
  D<- rbind(D, tmp)
}


#Run our tests against it
#library(devtools)
#install_github("jlederluis/digitanalysis", force = TRUE)

library(digitanalysis)
library(ggplot2)

Data <- process_digit_data(raw_df = D, digit_columns = c('Expense'))

#######################################
# Plot toggle: turn on before plotting
plot_toggle = TRUE 

# First digit test
first_digit = all_digits_test(digitdata = Data, data_columns = 'Expense', digit_places = 1, omit_05 = 0, break_out='District',
                              suppress_first_division_plots=TRUE, plot=plot_toggle)
first_digit$p_values
first_digit$plot$AllBreakout


#All digits test except first with expenditure
all_digits = all_digits_test(digitdata = Data, data_columns = 'Expense', skip_first_digit = TRUE, omit_05 = c(0,5), break_out='District',
                            suppress_first_division_plots=TRUE, plot=plot_toggle)
all_digits$plots$AllBreakout
all_digits$p_values


### Make figs

pdf("FirstDigits.pdf")
first_digit$plot$AllBreakout
dev.off()

pdf("FirstDigits-C.pdf")
first_digit$plot$C
dev.off()

pdf("AllDigits.pdf")
all_digits$plot$AllBreakout
dev.off()


# P-Value printout to produce final table

sink(file = "P-Values.txt")

print("First Digits")
first_digit$p_values


print("AllDigits")
all_digits$p_values

sink()


